// Data Preparation
// ----------------------------
set more off
** Import Transaction Data from DataSetup*.do
use "${Data_Clean}/EVSales_Panel_Data_distance_matched_CBG_placebo.dta", clear
sort groupid group_pair_id year month adjacent

* Drop unnecessay variables
drop rowid colid eiaid fixedcharge startdate enddate manually_ WkdyAvgEVRateT2-WkdyAvgEVRateT4 ///
	EVRateOffPeakTier2-EVRateOffPeakTier4 EVRatePeakTier2-EVRatePeakTier4 EVRateShoulderTier2-EVRateShoulderTier4 ///
	pastlastupdate lastlastupdate_EV 
	
rename SellPrice1 P_BEV
rename SaleNumber1 Q_BEV
rename SellPrice2 P_PHEV
rename SaleNumber2 Q_PHEV
rename zipcode_primary zip
rename mean_income income
rename census_bg_area area
rename retailaverage P_gas

gen PQ_BEV = P_BEV*Q_BEV
gen PQ_PHEV = P_PHEV*Q_PHEV

* Generate auto class shares
gen hybrid_share = hybrid_count / total_car_count
gen luxury_share = luxury_count / total_car_count
gen mud_hh_share = total_mud / total_hh

* Generate price variable for electricity rates
gen ActivePrice = HighestTierAvgRate
replace ActivePrice = AvgRateTier1 if (utility_name == "Pacific Gas & Electric Company" & (year == 2017 & month>=1))
replace ActivePrice = AvgRateTier1 if (utility_name == "San Diego Gas & Electric" & (year == 2017 | (year==2016 & month>=7)))
replace ActivePrice = AvgRateTier1 if (utility_name == "Southern California Edison" & (year == 2017 & month>=1))

* Look at average price over the sample period
tab modate utility_name if (utility_name == "Pacific Gas & Electric Company"  | utility_name == "San Diego Gas & Electric" |utility_name == "Southern California Edison"), sum(ActivePrice) nost nofreq

* Add kWh cutoff
local kwhcutoff = 750
foreach kwhcutoff in 600 700 750 800 900 1000 {
gen KWH`kwhcutoff'MP = AvgRateTier0
forvalues x = 1 / 4 {	
	local y = `x' - 1
	replace KWH`kwhcutoff'MP = AvgRateTier`x' if (`kwhcutoff' > Tier`y'UsageAmt & `kwhcutoff' <= Tier`x'UsageAmt)
}
}

* Add average prices for different levels of consumption
foreach kwhcutoff in 600 750 900 {
gen KWH`kwhcutoff'AP = AvgRateTier0
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*(`kwhcutoff'-Tier0UsageAmt) if (`kwhcutoff' > Tier0UsageAmt & `kwhcutoff' <= Tier1UsageAmt)
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*Tier1UsageAmt +  AvgRateTier2*(`kwhcutoff'-Tier1UsageAmt) if (`kwhcutoff' > Tier1UsageAmt & `kwhcutoff' <= Tier2UsageAmt)
replace KWH`kwhcutoff'AP =  AvgRateTier0*Tier0UsageAmt + AvgRateTier1*Tier1UsageAmt + AvgRateTier2*Tier2UsageAmt + AvgRateTier3*(`kwhcutoff'-Tier2UsageAmt) if (`kwhcutoff' > Tier2UsageAmt & `kwhcutoff' <= Tier3UsageAmt)
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*Tier1UsageAmt + AvgRateTier2*Tier2UsageAmt + AvgRateTier3*Tier3UsageAmt + AvgRateTier4*(`kwhcutoff'-Tier3UsageAmt) if (`kwhcutoff' > Tier3UsageAmt & `kwhcutoff' <= Tier4UsageAmt)

replace KWH`kwhcutoff'AP=KWH`kwhcutoff'AP/`kwhcutoff'
}

/*Check on this*/
drop if adjacent==.
drop modate number_price_obs number_station_obs climate_zone standard_count-prius_count total_single total_mud

* Spread to make pairs
*total_station included?
reshape wide P_BEV Q_BEV P_PHEV Q_PHEV PQ_BEV PQ_PHEV cbg distance* utility_name income population area fuel_economy_mean pop_density total_hh hybrid_share luxury_share mud_hh_share ///
	AvgRate* Tier* KWH* MaxTier* HighestTier* Wkdy* EVRate* zip region P_gas ActivePrice, i(groupid group_pair_id year month) j(adjacent)

* Make the data annual	
collapse (sum) Q_BEV* Q_PHEV* PQ_PHEV* PQ_BEV* (mean) cbg0 cbg1 distance* income* population* area* fuel_economy_mean* pop_density* total_hh* hybrid_share* luxury_share* mud_hh_share* ///
	AvgRate* Tier* MaxTier*  KWH* HighestTier* Wkdy* EVRate* P_gas* zip* ActivePrice*, by(groupid group_pair_id year utility_name* region*) 
	
*Restrict sample s.t. reference is IOUs only (later we run vice-versa)
* Add the dummies for the three CA IOUs
gen ref_PGE = (utility_name0 == "Pacific Gas & Electric Company")
gen ref_SCE = (utility_name0 == "Southern California Edison")
gen ref_SDGE = (utility_name0 == "San Diego Gas & Electric")
gen adj_PGE = (utility_name1 == "Pacific Gas & Electric Company")
gen adj_SCE = (utility_name1 == "Southern California Edison")
gen adj_SDGE = (utility_name1 == "San Diego Gas & Electric")

gen ref_IOU = ref_PGE + ref_SCE + ref_SDGE
gen adj_IOU = adj_PGE + adj_SCE + adj_SDGE

gen PGE_dum = (ref_PGE == 1)
gen SCE_dum = (ref_SCE == 1)
gen SDGE_dum = (ref_SDGE == 1)

* Top tier prices
rename HighestTierAvgRate0 p_e_high0
rename HighestTierAvgRate1 p_e_high1

rename ActivePrice0 p_e_active0
rename ActivePrice1 p_e_active1

* Shares of BEVs by population
local pop_var "population"
gen share_BEV0 = Q_BEV0/(`pop_var'0/10000)
gen share_BEV1 = Q_BEV1/(`pop_var'1/10000)

*stations PC included?
* Calculate difference variables and condition sample
foreach i in population pop_density income hybrid_share luxury_share mud_hh_share fuel_economy_mean p_e_high p_e_active P_gas KWH750AP KWH750MP KWH600AP KWH600MP KWH900AP KWH900MP share_BEV {
	gen delta_`i' = `i'0-`i'1
	twoway histogram delta_`i'
}

/*Put prices in cents per KWH*/
foreach i of varlist delta_p_e* delta_KWH* {
	replace `i' = `i'*100
}

/*Put dollars and population in 000s*/
foreach i of varlist delta_population delta_income delta_pop_density {
	replace `i' = `i'/1000
}

/*Tag all CBG pairs that appear in appear when matching in both directions*/
gen double first_cbg = cbg0 if cbg0<cbg1
replace first_cbg = cbg1 if cbg1<cbg0
gen double second_cbg = cbg1 if cbg0<cbg1
replace second_cbg = cbg0 if cbg1<cbg0
duplicates tag year first_cbg second_cbg, gen(paircount)
sort year first_cbg second_cbg
gen duplicate_drop = (first_cbg[_n]==first_cbg[_n-1] & second_cbg[_n]==second_cbg[_n-1]&year[_n]==year[_n-1])
/*Generate distance on the IOU / non-IOU side of border*/


/*Set of group ids for each pair of utilities regardless of which is the reference*/
gen first_util = utility_name0 if utility_name0<utility_name1
replace first_util = utility_name1 if utility_name1<utility_name0
gen second_util = utility_name1 if utility_name0<utility_name1
replace second_util = utility_name0 if utility_name1<utility_name0
egen pairid = group(first_util second_util)

/*Generate distance on the IOU / non-IOU side of border*/

gen cons=1
gen IOU_dist = .
gen muni_dist = .

replace IOU_dist = distance_util0 if ref_IOU==1
replace IOU_dist = -distance_util1 if adj_IOU==1

replace muni_dist = -distance_util0 if ref_IOU==0
replace muni_dist = distance_util1 if adj_IOU==0


label var delta_population "$\Delta$ Population (000s)"
label var delta_pop_density "$\Delta$ Pop Density (000s ppl/sqm)"
label var delta_income "$\Delta$ Income (\\$000)"
label var delta_P_gas "$\Delta$ Gas Price (\\$/gal)"
label var delta_p_e_active "$\Delta$ Marg. Price (cents/kwh)"
label var delta_p_e_high "$\Delta$ Marg. Price - Top Tier (cents/kwh)"


foreach kwhcutoff in  600 750 900 {
label var  delta_KWH`kwhcutoff'MP "$\Delta$ Marg. Price at `kwhcutoff' kwh"
label var delta_KWH`kwhcutoff'AP "$\Delta$ Avg Price for `kwhcutoff' kwhs"
}


label var delta_fuel_economy_mean "$\Delta$ Mean Fuel Econ (mpg, 2013)"
label var delta_hybrid_share "$\Delta$ Hybrid Fleet Share (2013)"
label var delta_luxury_share "$\Delta$ Luxury Fleet Share (2013)"
label var delta_mud_hh_share "$\Delta$ MUD HH share (2013)"

/*Put gasoline in cents per gallon*/
foreach i of varlist delta_P_gas {
	replace `i' = `i'*100
	label var delta_P_gas "$\Delta$ Gas Price (cpg)" 
}

save "${Data_Clean}/spatial_RD_regdata_placebo.dta", replace
